Data is extracted and cleaned using Python in simulation.ipynb. The Python notebook is also used to generate a rudimentary config file, but some things (network connectivity) are specified manually.
R reads the cleaned data from the spreadsheet and uses this to: * Create a graph structure * Make the data into a JAGS-readable format with some post-pre-processing
# read in config data
configsheets = excel_sheets(configpath)
for (sheet in configsheets) {
assign(sheet, read_excel(configpath, sheet))
}
stopifnot(!anyDuplicated(well_fp_map$well)) # each well cannot map to multiple flash plants
# read in regression data (plus extra)
regression_df = read_excel(regdatapath)
# dry_df = read_csv(extradatapath) %>% rename(mf = ip_sf)
dry_df = read_csv(extradatapath) %>% rename(well=facility)
## Parsed with column specification:
## cols(
## facility = col_character(),
## date = col_date(format = ""),
## key = col_character(),
## mf = col_double()
## )
regression_df = regression_df %>%
mutate(date_numeric = as.numeric(date - base_datetime)) %>%
mutate(date_numeric=ifelse(date_numeric>0, date_numeric, NA)) # remove dates before baseline
## Warning: package 'bindrcpp' was built under R version 3.4.4
dry_df = dry_df %>%
filter(well %ni% unique(regression_df$well)) %>%
mutate(date_numeric = as.numeric(as.POSIXct(date) - base_datetime)) %>%
mutate(date_numeric=ifelse(date_numeric>0, date_numeric, NA)) # remove dates before baseline
well_fp_map = well_fp_map %>% select(well, fp) %>%drop_na()
# today_numeric = (Sys.time() - base_datetime) %>% as.numeric()
today_numeric = (today_datetime - base_datetime) %>% as.numeric()
# assign unique facility IDs
liq_wells = unique(regression_df$well) # aka production curve wells
dry_wells = unique(dry_df$well) # aka time series wells
map_wells = unique(well_fp_map$well)
no_data_wells = map_wells[!map_wells %in% c(liq_wells, dry_wells)] # see which ones we're completely guessing for
no_map_wells = c(liq_wells, dry_wells)[!c(liq_wells, dry_wells) %in% map_wells]
well_names = c(liq_wells, dry_wells)
fp_names = c(well_fp_map$fp, fp_gen_map$fp, fp_constants$fp) %>% unique()
fluid_types = c('ip', 'lp', 'w')
gen_names = gen_constants$gen %>% unique() %>% sort()
ip_gen_names = paste(gen_names, 'ip', sep='_')
lp_gen_names = paste(gen_names, 'lp', sep='_')
w_gen_names = paste(gen_names, 'w', sep='_')
dummy_gen_names = c(ip_gen_names, lp_gen_names, w_gen_names) %>% sort()
all_names = c('DUMMY', well_names, fp_names, dummy_gen_names, gen_names)
ids = 1:length(all_names)
names(ids) = all_names
# add names in data with IDs
regression_df = regression_df %>% mutate(well_id=ids[well])
dry_df = dry_df %>% mutate(well_id=ids[well])
operating_conditions = operating_conditions %>% mutate(well_id=ids[well]) %>% rename(whp_pred=whp)
fp_constants = fp_constants %>% mutate(fp_id=ids[fp])
gen_constants = gen_constants %>% mutate(gen_id=ids[gen]) %>% select(-gen)
well_fp_map = well_fp_map %>% mutate(well_id=ids[well], fp_id=ids[fp]) %>% select(-c(well, fp))
fp_gen_map = fp_gen_map %>% mutate(fp_id=ids[fp], gen_ip_id=ids[gen_ip], gen_lp_id=ids[gen_lp], gen_w_id=ids[gen_w]) %>% select(-c(fp, gen_ip, gen_lp, gen_w))
# create connectivity matrix. i flows to j
# wells to FPs
v = matrix(0, nrow=length(ids), ncol=length(ids))
v[1,-1] = 1
for (i in 1:nrow(well_fp_map)) {
id_i = well_fp_map[[i, 1]]
id_j = well_fp_map[[i, 2]]
v[id_i, id_j] = 1
}
# send ip/lp/w flows to dummy gens
for (i in 1:nrow(fp_gen_map)) {
id_i = fp_gen_map[[i, 1]]
for (j in 2:ncol(fp_gen_map)) {
facility_j = names(ids)[fp_gen_map[[i, j]]]
facility_dummy_j = paste(facility_j, fluid_types[j-1], sep='_')
id_j = ids[facility_dummy_j]
if (!is.na(id_j)) {
v[id_i, id_j] = 1
}
}
}
# dummy gens to gens
for (i in 1:nrow(gen_constants)) {
id_j = gen_constants$gen_id[i]
facility_j = names(ids)[id_j]
for (fluid in fluid_types) {
facility_dummy_i = paste(facility_j, fluid, sep='_')
id_i = ids[facility_dummy_i]
v[id_i, id_j] = 1
}
}
# convert form
m = matrix(0, nrow=nrow(v), ncol=max(colSums(v)))
rownames(m) = all_names
for (i in 1:nrow(v)) {
for (j in 1:ncol(v)) {
if (v[[i, j]]==1) {
m[j, sum(m[j,]>0)+1] = i
}
}
}
# generate coordinates
dummy_locs = data.frame(name='DUMMY', x=-0.1, y=0)
well_locs = data.frame(name=well_names, x=0, y=seq(1, 1/(length(well_names)-1), length.out=length(well_names)))
fp_locs = data.frame(name=fp_names, x=1, y=seq(0, 1, length.out=length(fp_names)))
gen_dummy_locs = data.frame(name=dummy_gen_names, x=2, y=seq(0, 1, length.out=length(dummy_gen_names)))
gen_locs = data.frame(name=gen_names, x=2.5, y=seq(1/11, 10/11, length.out=length(gen_names)))
locs = rbind(dummy_locs, well_locs, fp_locs, gen_dummy_locs, gen_locs)
locs$id = ids[locs$name]
locs = locs %>% arrange(id)
g = graph_from_adjacency_matrix(v) %>%
set_vertex_attr('label', value=all_names) %>%
set_vertex_attr('x', value=as.vector(locs$x)) %>%
set_vertex_attr('y', value=as.vector(locs$y)) %>%
set_vertex_attr('label.degree', value=pi) %>%
as.undirected()
V(g)$size = ifelse(V(g)$label %in% well_names, 5, 8)
V(g)$color = ifelse(V(g)$label %in% dry_wells, "red", "orange")
E(g)$color = "black"
E(g)[which(tail_of(g, E(g))$label=="DUMMY")]$color = "grey"
# png("../media/full_network.png")
par(mar=c(0,3,0,0), family="Times")
plot(g, vertex.label.dist=3,
mark.groups = list(wells=ids[well_names], fps=ids[fp_names], gens=ids[gen_names]),
mark.col = "#EEEEEE",
mark.border = NA)
text(c(-1, -0.3, 0.4, 0.9), 1.15, c("Wells", "Flash plants", "Dummy gens", "Generators"), cex=1.25)
The dummy node is necessary because when indexing a subset of flows that go into a node, this subset cannot be empty. The dummy node has zero mass flowing out of it.
regression_list = regression_df %>% select(well_id, whp, mf, date_numeric) %>% as.list()
dry_list = dry_df %>%
rename(well_id_dry=well_id, mf_dry=mf, date_numeric_dry=date_numeric) %>% # use these in a different regression
select(well_id_dry, mf_dry, date_numeric_dry) %>% as.list()
operating_conditions_list = operating_conditions %>% arrange(well_id) %>% select(whp_pred) %>% as.list()
fp_constants_list = as.list(fp_constants)
gen_constants_list = as.list(gen_constants %>% select(gen_id, factor))
facilities = data.frame(id=1:max(ids)) %>%
full_join(operating_conditions %>% rename(id=well_id) %>% select(-well), by='id') %>%
full_join(gen_constants %>% select(factor, id=gen_id), by='id') %>%
full_join(fp_constants %>% rename(id=fp_id), by='id') %>%
mutate(mf_pred=NA) %>%
mutate(n_inflows=colSums(v))
## Warning: Column `id` has different attributes on LHS and RHS of join
## Warning: Column `id` has different attributes on LHS and RHS of join
## Warning: Column `id` has different attributes on LHS and RHS of join
well_ids = ids[well_names]
liq_well_ids = ids[liq_wells]
dry_well_ids = ids[dry_wells]
fp_ids = ids[fp_names]
ip_gen_ids = ids[ip_gen_names]
lp_gen_ids = ids[lp_gen_names]
w_gen_ids = ids[w_gen_names]
gen_ids = ids[gen_names]
# force all mass to IP steam
dry_fps = c("poi dry", "direct ip")
dry_fp_ids = ids[dry_fps]
facilities$hf_ip[facilities$id %in% dry_fp_ids] = 10
facilities$hfg_ip[facilities$id %in% dry_fp_ids] = 10
facilities_list = facilities %>% select(-id) %>% as.list()
# insert production curve predictions
prod = data.frame(whp_prod=seq(6, 16, length.out=10),
well_id_prod=ids[production_curve_well])
## Warning in data.frame(whp_prod = seq(6, 16, length.out = 10), well_id_prod
## = ids[production_curve_well]): row names were found from a short variable
## and have been discarded
ts = data.frame(date_numeric_ts=seq(max(dry_df$date_numeric)-30, max(dry_df$date_numeric)+30, length.out=10),
well_id_ts=ids[ts_well])
## Warning in data.frame(date_numeric_ts = seq(max(dry_df$date_numeric) -
## 30, : row names were found from a short variable and have been discarded
prod_list = prod %>% as.list
ts_list = ts %>% as.list
# experimental TS data matrix for dry wells
ar_order = 1
empty = setNames(data.frame(matrix(ncol = length(all_names), nrow = 0)), all_names)
drymatrix = dry_df %>%
select(well, date_numeric, mf) %>%
spread(well, mf) %>%
select(-date_numeric)
drymatrix = empty %>%
full_join(drymatrix) %>%
as.matrix()
## Joining, by = c("wk116", "wk123", "wk229", "wk26a", "wk26b", "wk27", "wk28", "wk46", "wk55", "wk67", "wk70", "wk71", "wk72", "wk74", "wk76", "wk81", "wk83", "wk118", "wk216", "wk228", "wk233", "wk234", "wk236", "wk237", "wk238", "wk240", "wk241", "wk249", "wk25", "wk250", "wk251", "wk252", "wk604", "wk605", "wk606", "wk607", "wk610", "wk65")
ar_well_ids = which(complete.cases(t(drymatrix[1:(ar_order+1),])))
# which wells can we not use AR for
dry_no_ar_wells = dry_wells[!dry_well_ids %in% ar_well_ids]
dry_no_ar_well_ids = ids[dry_no_ar_wells]
# extend matrix for prediction
days_since_first = as.integer(today_datetime - as.POSIXct(min(dry_df$date)))
drymatrix = rbind(drymatrix, matrix(NA, nrow=days_since_first, ncol=ncol(drymatrix)))
# combine into one list
data = c(regression_list, dry_list, facilities_list, prod_list, ts_list,
list(well_ids=well_ids, liq_well_ids=liq_well_ids,
dry_well_ids=dry_well_ids, dry_no_ar_well_ids=dry_no_ar_well_ids,
fp_ids=fp_ids,
gen_ids=gen_ids, ip_gen_ids=ip_gen_ids, lp_gen_ids=lp_gen_ids, w_gen_ids=w_gen_ids,
today_numeric=today_numeric, m=m, dummy=1,
ts=drymatrix, ts_ar=drymatrix, ts_ema=drymatrix, ar_well_ids=ar_well_ids))
data$whp_pred[is.na(data$whp_pred)] <- mean(data$whp_pred, na.rm=T)
# center covariates
mean_whp <- mean(data$whp, na.rm=T)
mean_date_numeric <- mean(data$date_numeric, na.rm=T)
data$whp_c <- data$whp - mean_whp
data$whp_pred_c <- data$whp_pred - mean_whp
data$whp_prod_c <- data$whp_prod - mean_whp
data$date_numeric_c <- data$date_numeric - mean_date_numeric
data$today_numeric_c <- data$today_numeric - mean_date_numeric
data$date_numeric_dry_c <- data$date_numeric_dry - mean_date_numeric
data$date_numeric_ts <- data$date_numeric_ts - mean_date_numeric
code = "
data {
D <- dim(ts)
}
model {
##############################################
# fit individual regressions to liquid wells #
##############################################
for (i in 1:length(mf)) {
mu[i] <- Intercept[well_id[i]] + beta_whp[well_id[i]] * whp_c[i] + beta_date[well_id[i]] * date_numeric_c[i]
mf[i] ~ dnorm(mu[i], tau[well_id[i]])
mf_fit[i] ~ dnorm(mu[i], tau[well_id[i]])
# mf_fit[i] ~ dnorm(mu[i]*measurement_error_factor[i], tau[well_id[i]])
# measurement_error_factor[i] ~ dunif(0.9, 1.1)
}
# fit regression to dry wells
for (i in 1:length(mf_dry)) {
mu_dry[i] <- Intercept[well_id_dry[i]] + beta_date[well_id_dry[i]] * date_numeric_dry_c[i]
mf_dry[i] ~ dnorm(mu_dry[i], tau[well_id_dry[i]])
mf_dry_fit[i] ~ dnorm(mu_dry[i], tau[well_id_dry[i]])
# measurement_error_factor_dry[i] ~ dunif(0.9, 1.1)
}
for (j in dry_well_ids) {
Intercept[j] ~ dnorm(0, 1e-12)
beta_date[j] ~ dnorm(0, 1e-12)
tau[j] ~ dgamma(1e-12, 1e-12)
}
# experimental AR1 model for dry wells
for (j in ar_well_ids) {
for (t in 2:D[1]) {
mu_ar[t,j] <- c_ar[j] + theta_ar[j]*ts_ar[t-1,j]
ts_ar[t,j] ~ dnorm(mu_ar[t,j], tau_ar[j]) T(0,)
}
theta_ar[j] ~ dnorm(0, 1e-12)
c_ar[j] ~ dnorm(0, 1e-12)
tau_ar[j] ~ dgamma(1e-12, 1e-12)
}
# experimental EMA model (don't use)
for (j in ar_well_ids) {
for (t in 2:D[1]) {
mu_ema[t,j] <- alpha*mu_ema[t-1,j] + (1-alpha)*ts_ema[t-1,j]
ts_ema[t,j] ~ dnorm(mu_ema[t,j], tau_ema[j]) T(0,)
}
mu_ema[1,j] <- ts_ema[1,j]
theta_ema[j] ~ dnorm(0, 1e-12)
c_ema[j] ~ dnorm(0, 1e-12)
tau_ema[j] ~ dgamma(1e-12, 1e-12)
}
alpha ~ dbeta(0.5, 0.5)
# HIERARCHICAL
# fills in for any missing wells
for (j in liq_well_ids) {
Intercept[j] ~ dnorm(mu_Intercept, tau_Intercept)
beta_whp[j] ~ dnorm(mu_beta_whp, tau_beta_whp)
beta_date[j] ~ dnorm(mu_beta_date, tau_beta_date)
tau[j] ~ dgamma(1e-12, 1e-12)
sd[j] <- 1/max(sqrt(tau[j]), 1e-12)
}
# fill in any missing data
for (i in 1:length(mf)) {
date_numeric_c[i] ~ dnorm(mu_date_numeric, tau_date_numeric)
}
mu_date_numeric ~ dnorm(0, 1e-12)
tau_date_numeric ~ dnorm(1e-12, 1e-12)
# set hyperparameters
mu_Intercept ~ dnorm(0, 1e-12)
mu_beta_whp ~ dnorm(0, 1e-12)
mu_beta_date ~ dnorm(0, 1e-12)
tau_Intercept ~ dgamma(1e-12, 1e-12)
tau_beta_whp ~ dgamma(1e-12, 1e-12)
tau_beta_date ~ dgamma(1e-12, 1e-12)
#####################################
# production curve for verification #
#####################################
for (i in 1:length(whp_prod)) {
mu_prod[i] <- Intercept[well_id_prod[i]] + beta_whp[well_id_prod[i]] * whp_prod_c[i] + beta_date[well_id_prod[i]] * today_numeric_c
mf_prod[i] ~ dnorm(mu_prod[i], tau[well_id_prod[i]])
}
for (i in 1:length(date_numeric_ts)) {
mu_ts[i] <- Intercept[well_id_ts[i]] + beta_date[well_id_ts[i]] * date_numeric_ts[i]
mf_ts[i] ~ dnorm(mu_ts[i], tau[well_id_ts[i]])
}
#########################################################
# simple model to fill in missing FP enthalpy constants #
#########################################################
for (i in fp_ids) {
# missing fp constants
hf_ip[i] ~ dgamma(param[1], param[7])
hg_ip[i] ~ dgamma(param[2], param[8])
hfg_ip[i] ~ dgamma(param[3], param[9])
hf_lp[i] ~ dgamma(param[4], param[10])
hg_lp[i] ~ dgamma(param[5], param[11])
hfg_lp[i] ~ dgamma(param[6], param[12])
}
for (i in c(1, well_ids)) { h[i] ~ dgamma(param[13], param[14]) } # missing well constants
for (i in 1:14) { param[i] ~ dgamma(1e-12, 1e-12) } # uniform priors
########################################
# make predictions (the stuff we want) #
########################################
mf_pred[dummy] <- 0 # dummy well
ip_sf[dummy] <- 0
lp_sf[dummy] <- 0
wf[dummy] <- 0
# use production curve
for (j in liq_well_ids) {
mf_pred[j] <- Intercept[j] + beta_whp[j] * whp_pred_c[j] + beta_date[j] * today_numeric_c
}
# use naive TS reg
for (j in dry_well_ids) { # dry_no_ar_well_ids) {
mf_pred[j] <- Intercept[j] + beta_date[j] * today_numeric_c
}
# use AR(1)
# for (j in ar_well_ids) {
# mf_pred[j] <- mu_ar[D[1], j]
# }
for (i in fp_ids) {
mf_pred[i] <- sum(mf_pred[m[i,1:n_inflows[i]]])
h[i] <- sum(mf_pred[m[i, 1:n_inflows[i]]] * h[m[i, 1:n_inflows[i]]]) / ifelse(mf_pred[i]!=0, mf_pred[i], 1)
ip_sf[i] <- min(max((h[i] - hf_ip[i]), 0) / hfg_ip[i], 1) * mf_pred[i]
lp_sf[i] <- min(max((min(hf_ip[i], h[i]) - hf_lp[i]), 0) / hfg_lp[i], 1) * (mf_pred[i] - ip_sf[i])
total_sf[i] <- ip_sf[i] + lp_sf[i]
wf[i] <- mf_pred[i] - total_sf[i]
}
# dummy gens and actual gens
for (i in ip_gen_ids) { mf_pred[i] <- sum(ip_sf[m[i, 1:n_inflows[i]]]) }
for (i in lp_gen_ids) { mf_pred[i] <- sum(lp_sf[m[i, 1:n_inflows[i]]]) }
for (i in w_gen_ids) { mf_pred[i] <- sum(wf[m[i, 1:n_inflows[i]]]) }
for (i in gen_ids) {
mf_pred[i] <- sum(mf_pred[m[i,1:n_inflows[i]]])
power[i] <- mf_pred[i] / mu_factor[i]
mu_factor[i] ~ dunif(0.95*factor[i], 1.05*factor[i]) # uncertainty from email
}
total_power <- sum(power[gen_ids])
}
"
vars = c('mf_fit',
'mf_dry_fit',
'mf_ts',
'mf_prod',
'mf_pred',
'beta_date',
'sd',
'power',
'total_sf',
'mu_ar',
'mu_ema',
'alpha',
# paste0('m_ts[', ar_well_ids, ']'),
paste0('h[', fp_ids, ']'),
paste0('mu_', c('Intercept', 'beta_whp', 'beta_date')),
'total_power')
n_chains = 2
burn_in = 100
n_steps = 1000
model = jags.model(textConnection(code), data, n.chains=n_chains)
## Warning in jags.model(textConnection(code), data, n.chains = n_chains):
## Unused variable "whp" in data
## Warning in jags.model(textConnection(code), data, n.chains = n_chains):
## Unused variable "date_numeric" in data
## Warning in jags.model(textConnection(code), data, n.chains = n_chains):
## Unused variable "date_numeric_dry" in data
## Warning in jags.model(textConnection(code), data, n.chains = n_chains):
## Unused variable "whp_pred" in data
## Warning in jags.model(textConnection(code), data, n.chains = n_chains):
## Unused variable "fp" in data
## Warning in jags.model(textConnection(code), data, n.chains = n_chains):
## Unused variable "limit" in data
## Warning in jags.model(textConnection(code), data, n.chains = n_chains):
## Unused variable "dry_no_ar_well_ids" in data
## Warning in jags.model(textConnection(code), data, n.chains = n_chains):
## Unused variable "today_numeric" in data
## Compiling data graph
## Resolving undeclared variables
## Allocating nodes
## Initializing
## Reading data back into data table
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 4800
## Unobserved stochastic nodes: 4623
## Total graph size: 31557
##
## Initializing model
update(model, burn_in)
out = coda.samples(model, n.iter=round(n_steps/n_chains), variable.names=vars)
outmatrix = as.matrix(out)
outframe = as.data.frame(outmatrix) %>%
gather(key=facility, value=value) %>%
mutate(variable=gsub("\\[.*$", "", facility), facility=parse_number(facility, na=c("NA")))
## Warning: 5000 parsing failures.
## row # A tibble: 5 x 4 col row col expected actual expected <int> <int> <chr> <chr> actual 1 1 NA a number alpha row 2 2 NA a number alpha col 3 3 NA a number alpha expected 4 4 NA a number alpha actual 5 5 NA a number alpha
## ... ................. ... ............................. ........ ............................. ...... ............................. ... ............................. ... ............................. ........ ............................. ...... .............................
## See problems(...) for more details.
outframe$facility = factor(names(ids)[outframe$facility])
# for over-plotting
special_wells = c('wk124', 'wk242', 'wk263', 'wk270', 'wk271')
g1 = ggplot(outframe %>%
filter(facility %in% well_names, variable=="mf_pred", value>0) %>%
mutate(source = ifelse(facility %in% dry_wells, "PI time series", "Production curve")),
aes(x=value, fill=facility)) +
geom_density(aes(y=..scaled..), alpha=0.5, color=NA) + xlim(0, NA) +
facet_grid(source~.) +
labs(title="Posterior Well Mass Flows for 2018", x="Mass flow (T/h)", y="Density", fill="Facility") +
ggsave('../media/mf_wells.png', width=6, height=4, units='in')
g2 = ggplot(outframe %>% filter(variable=="beta_date") %>% filter(facility %in% special_wells), aes(x=value, fill=facility)) +
geom_density(alpha=0.5, color=NA) +labs(title="Posterior Decline Rate of Test Data", x="beta_date (T/h/Bar)", y="Density", fill="Facility") +
ggsave('../media/beta_date.png', width=6, height=4, units='in')
g3 = ggplot(outframe %>% filter(facility %in% fp_names, variable=="mf_pred", value>0), aes(x=value, fill=facility)) +
geom_density(aes(y=..scaled..), alpha=0.5, color=NA) + xlim(0, NA) +
labs(title="Posterior Flash Plant Mass Flows for 2018", x="Mass flow (T/h)") +
ggsave('../media/mf_fps.png', width=6, height=4, units='in')
g4 = ggplot(outframe %>% filter(facility %in% gen_names, variable=="mf_pred", value>0), aes(x=value, fill=facility)) +
geom_density(aes(y=..scaled..), alpha=0.5, color=NA) + xlim(0, NA) +
labs(title="Posterior Generator Mass Flows for 2018", x="Mass flow (T/h)", y="Scaled density", fill="Facility") +
ggsave('../media/mf_gens.png', width=6, height=4, units='in')
g5 = ggplot(outframe %>% filter(facility %in% gen_names, variable=="power", value>0), aes(x=value, fill=facility)) +
geom_density(aes(y=..scaled..), alpha=0.5, color=NA) + xlim(0, NA) +
labs(title="Posterior Generator Power Output for 2018", x="Power (MW)", y="Scaled density", fill="Facility") +
ggsave('../media/power_gens.png', width=6, height=4, units='in')
tb6 <- outframe %>% filter(variable=="sd") %>% select(facility, value) %>%
mutate(well=factor(facility)) %>%
group_by(well) %>%
summarise(Mean = mean(value),
`Lower 2.5%` = quantile(value, 0.025),
`Upper 97.5%` = quantile(value, 0.975)) %>%
mutate_if(is.numeric, round, 3) %>%
inner_join(regression_df %>% mutate(well=factor(names(ids)[well_id])) %>% group_by(well) %>% summarise(n=n()), by="well")
g6 = ggplot(outframe %>% filter(variable=="sd") %>% filter(facility %in% special_wells), aes(x=value, fill=facility)) +
geom_density(alpha=0.5, color=NA) + coord_cartesian(xlim=c(0, max(tb6$`Upper 97.5%`))) +
labs(title="Posterior Flow Deviation Estimates", x="Standard deviation", y="Density", fill="Facility") +
ggsave('../media/standard_deviation.png', width=6, height=4, units='in')
ggplotly(g1, tooltip=c('facility', 'value'))
ggplotly(g2, tooltip=c('facility', 'value'))
ggplotly(g3, tooltip=c('facility', 'value'))
ggplotly(g4, tooltip=c('facility', 'value'))
ggplotly(g5, tooltip=c('facility', 'value'))
ggplotly(g6, tooltip=c('facility', 'value'))
# g1; g2; g3; g4; g5; g6
tb2 <- outframe %>% filter(variable=="beta_date") %>% select(facility, value) %>%
mutate(well=factor(facility)) %>%
group_by(well) %>%
summarise(Mean = mean(value),
`Lower 2.5%` = quantile(value, 0.025),
`Upper 97.5%` = quantile(value, 0.975)) %>%
mutate_if(is.numeric, round, 3) %>%
inner_join(regression_df %>% mutate(well=factor(names(ids)[well_id])) %>% group_by(well) %>% summarise(n=n()), by="well")
## Warning: Column `well` joining factors with different levels, coercing to
## character vector
tb2[tb2$well %in% special_wells,] %>% knitr::kable()
| well | Mean | Lower 2.5% | Upper 97.5% | n |
|---|---|---|---|---|
| wk124 | -0.062 | -0.085 | -0.039 | 31 |
| wk242 | 0.019 | -0.009 | 0.047 | 28 |
| wk263 | -0.014 | -0.021 | -0.007 | 34 |
| wk270 | -0.083 | -0.207 | 0.027 | 5 |
| wk271 | -0.071 | -0.196 | 0.032 | 6 |
tb6[tb6$well %in% special_wells,] %>% knitr::kable()
| well | Mean | Lower 2.5% | Upper 97.5% | n |
|---|---|---|---|---|
| wk124 | 22.816 | 17.403 | 30.203 | 31 |
| wk242 | 148.061 | 111.727 | 199.446 | 28 |
| wk263 | 13.987 | 11.184 | 18.154 | 34 |
| wk270 | 35.757 | 13.654 | 93.629 | 5 |
| wk271 | 107.769 | 51.155 | 253.192 | 6 |
| # Hyper- | parameters |
hp.df <- outframe %>% filter(variable %in% c("mu_beta_date", "mu_beta_whp", "mu_Intercept"))
hp.quantiles <- hp.df %>%
group_by(variable) %>%
summarise(`Lower 2.5%` = quantile(value, 0.025),
`Upper 97.5%` = quantile(value, 0.975)) %>%
gather(key='Quantile', value='value', `Lower 2.5%`, `Upper 97.5%`)
coefplot = ggplot(hp.df, aes(x=value, fill=variable)) +
facet_wrap(~variable, nrow=3, scales = "free") +
geom_density(aes(y=..scaled..), alpha=0.5, color=NA) +
geom_vline(data=hp.quantiles, aes(xintercept=value, color=Quantile)) +
labs(title="Posterior Regression Coefficients", x="Value", y="Density", fill="Variable")
ggplotly(coefplot)
prod = as.data.frame(outmatrix) %>%
select(contains('prod')) %>%
gather(key=facility, value=value) %>%
mutate(which=parse_number(facility)) %>%
mutate(whp=data$whp_prod[which]) %>%
rename(mf=value) %>%
group_by(whp) %>%
summarise(lower=quantile(mf, 0.025),
upper=quantile(mf, 0.975),
mean=mean(mf))
plotdata = regression_df %>%
filter(well_id==ids[production_curve_well]) %>%
mutate(datetime = factor(as.Date(date)))
ggplot(prod, aes(x=whp)) +
geom_line(aes(y=mean, lty="Bayesian Regression"), color='red') +
# geom_line(data=reglines, aes(y=mf, group=datetime, lty="OLS Regression")) +
geom_ribbon(aes(ymin=lower, ymax=upper), alpha=0.25, fill='red') +
geom_point(data=plotdata, aes(y=mf, color=date)) +
labs(title=paste("Fitted Production Curve for", production_curve_well), x="Well-head pressure (bar)", y="Mass flow (T/h)", color="Date", linetype="") +
coord_cartesian(xlim=c(min(plotdata$whp)*0.875,max(plotdata$whp)*1.125), ylim=c(0,max(plotdata$mf)*1.125)) +
ggsave('../media/production_curve.png', width=6, height=4, units='in')
ts_out = as.data.frame(outmatrix) %>%
select(contains('ts')) %>%
gather() %>%
mutate(which=parse_number(key)) %>%
mutate(date_numeric=data$date_numeric_ts[which] + mean_date_numeric) %>%
rename(mf=value) %>%
group_by(date_numeric) %>%
summarise(lower=quantile(mf, 0.025),
upper=quantile(mf, 0.975),
mean=mean(mf))
tsplotdata = dry_df %>%
filter(well_id==ids[ts_well]) %>%
mutate(datetime = factor(as.Date(date)))
ggplot(ts_out, aes(x=date_numeric)) +
geom_line(aes(y=mean, lty="Bayesian Regression"), color='red') +
geom_ribbon(aes(ymin=lower, ymax=upper), alpha=0.25, fill='red') +
geom_line(data=tsplotdata, aes(y=mf, color=date)) +
ylim(0, NA) +
labs(title=paste("Fitted Linear Time Series for", ts_well), x="Days since baseline (2000)", y="Mass flow (T/h)", color="Date", linetype="") +
ggsave('../media/dry_time_series.png', width=6, height=4, units='in')
# experimental AR1 time series
ar_fit = as.data.frame(outmatrix) %>%
select(contains("mu_ar")) %>%
gather() %>%
mutate(t = as.numeric(str_extract(key, "(?<=\\[)(.*?)(?=,)"))) %>%
mutate(which = as.numeric(str_extract(key, "(?<=,)(.*?)(?=\\])"))) %>%
mutate(facility = names(ids)[which]) %>%
select(facility, t, value) %>%
group_by(facility, t) %>%
summarise(mean=mean(value),
lower=quantile(value, 0.025),
upper=quantile(value, 0.975))
actualts = drymatrix %>% as.data.frame() %>%
mutate(t = 1:nrow(drymatrix)) %>%
gather(key="facility", value="value", -t) %>%
filter(facility %in% names(ids)[ar_well_ids])
arplot = ggplot(ar_fit, aes(x=t, y=mean, fill=facility, color=facility)) +
geom_line(data=actualts, aes(y=value)) +
geom_ribbon(aes(ymin=lower, ymax=upper), color=NA, alpha=0.5) +
geom_line(linetype="dashed") + coord_cartesian(ylim=c(0, 50)) +
labs(title="AR(1) Experiment", x="Days since first date", y="Mass flow (T/h)") +
ggsave('../media/ar_experiment.png', width=6, height=4, units='in')
## Warning: Removed 1082 rows containing missing values (geom_path).
# experimental EMA time series
ema_fit = as.data.frame(outmatrix) %>%
select(contains("mu_ema")) %>%
gather() %>%
mutate(t = as.numeric(str_extract(key, "(?<=\\[)(.*?)(?=,)"))) %>%
mutate(which = as.numeric(str_extract(key, "(?<=,)(.*?)(?=\\])"))) %>%
mutate(facility = names(ids)[which]) %>%
select(facility, t, value) %>%
group_by(facility, t) %>%
summarise(mean=mean(value),
lower=quantile(value, 0.025),
upper=quantile(value, 0.975))
emaplot = ggplot(ema_fit, aes(x=t, y=mean, fill=facility, color=facility)) +
geom_line(data=actualts, aes(y=value)) +
geom_ribbon(aes(ymin=lower, ymax=upper), color=NA, alpha=0.5) +
geom_line(linetype="dashed") + coord_cartesian(ylim=c(0, 50)) +
labs(title="EWMA Experiment", x="Days since first date", y="Mass flow (T/h)") +
ggsave('../media/ema_experiment.png', width=6, height=4, units='in')
## Warning: Removed 1082 rows containing missing values (geom_path).
ggplotly(arplot)
ggplotly(emaplot)
trace1 <- outframe %>%
filter(variable=='mf_pred' & facility=='wk256')
trace2 <- outframe %>%
filter(variable=='total_power')
trace3 <- outframe %>%
filter(variable=='mu_Intercept')
traceplot = ggplot(trace1, aes(y=value, color=variable)) +
geom_line(aes(x=as.numeric(row.names(trace1)))) +
geom_line(data=trace2, aes(x=as.numeric(row.names(trace1)))) +
geom_line(data=trace3, aes(x=as.numeric(row.names(trace1)))) +
labs(title="Trace Plots (Single chain)", x="Iteration", y="Node value", color="Variable") +
ggsave('../media/trace_plots.png', width=6, height=4, units='in')
ggplotly(traceplot)
liq_fit = as.data.frame(outmatrix) %>%
select(contains('mf_fit')) %>%
gather(key='index', value='fitted') %>%
mutate(index=as.integer(parse_number(index))) %>%
group_by(index) %>%
summarise(lower=quantile(fitted, 0.025),
upper=quantile(fitted, 0.975),
Fitted=mean(fitted),
std=sd(fitted)) %>%
cbind(regression_df) %>%
mutate(`Standardised residual` = (Fitted-mf)/std,
Well = factor(names(ids[well_id])),
Observed = mf) %>%
gather(key="key", value="value", `Standardised residual`, Observed) %>%
select(Well, key, Fitted, value)
diagplot = ggplot(liq_fit, aes(x=Fitted, y=value)) +
geom_point(aes(color=Well, shape=Well)) + scale_shape_manual(values = 1:length(levels(liq_fit$Well))) +
geom_smooth(color='black') +
facet_grid(key~., scales="free_y", switch="y") +
geom_hline(data=data.frame(key="Standardised residual", value=c(1.96,-1.96)), aes(yintercept=value), color='red') +
labs(title="Diagnostic Plots", x="Fitted mass flow (T/h)", y="") +
ggsave('../media/diagnostics.png', width=6, height=4, units='in')
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplotly(diagplot)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
stdres_min = liq_fit %>% filter(key=="Standardised residual") %>% pull(value) %>% min()
stdres_max = liq_fit %>% filter(key=="Standardised residual") %>% pull(value) %>% max()
resdesplot = ggplot(liq_fit %>% filter(key=="Standardised residual"), aes(x=value)) +
geom_density(fill="red", alpha=0.5, color=NA) +
geom_line(data=data.frame(x=seq(stdres_min, stdres_max, length.out=100)), aes(x=x, y=dnorm(x)))
ggplotly(resdesplot)
sf.df <- outframe %>%
filter(str_detect(variable, "total_sf") & value > 0) %>%
droplevels()
limits = fp_constants %>%
mutate(facility = names(ids)[fp_id]) %>%
select(facility, limit) %>%
drop_na()
limitplot = ggplot(sf.df, aes(x=value, fill=facility)) +
facet_grid(facility~., scales = "free_y", switch="y") +
geom_density(color=NA, alpha=0.5) +
geom_vline(data=limits, aes(xintercept=limit, color=facility)) +
labs(title="Posterior Flash Plant Mass Flows", x="Steam flow (T/h)", y="Density", fill="Flash plant", color="Steam flow limit") +
ggsave('../media/constraints.png', width=6, height=4, units='in')
ggplotly(limitplot)
random_var_ix = sample.int(ncol(outmatrix), 100) # 100 random var because it takes too long
geweke.diag(out[,1:100])
## [[1]]
##
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5
##
## alpha beta_date[2] beta_date[3] beta_date[4] beta_date[5]
## 0.952808 -3.882998 0.136275 1.380467 -1.157648
## beta_date[6] beta_date[7] beta_date[8] beta_date[9] beta_date[10]
## 0.593783 1.357520 -1.966898 -3.509355 -0.312900
## beta_date[11] beta_date[12] beta_date[13] beta_date[14] beta_date[15]
## 0.148635 -1.519220 -0.311158 -0.107042 -2.133065
## beta_date[16] beta_date[17] beta_date[18] beta_date[19] beta_date[20]
## 0.673966 0.374221 1.150058 0.748739 0.551063
## beta_date[21] beta_date[22] beta_date[23] beta_date[24] beta_date[25]
## -0.593603 0.588185 0.106633 1.676510 1.086270
## beta_date[26] beta_date[27] beta_date[28] beta_date[29] beta_date[30]
## 0.018949 -5.867607 -0.647338 -0.303219 8.064064
## beta_date[31] beta_date[32] beta_date[33] beta_date[34] beta_date[35]
## -2.967810 -6.160783 4.450616 -8.445947 -1.137245
## beta_date[36] beta_date[37] beta_date[38] beta_date[39] beta_date[40]
## 1.895352 -10.960913 -5.790536 -0.013814 0.261345
## beta_date[41] beta_date[42] beta_date[43] beta_date[44] beta_date[45]
## 21.211797 -3.211275 0.453401 0.583987 -0.898446
## beta_date[46] beta_date[47] beta_date[48] beta_date[49] beta_date[50]
## 0.975961 1.751256 11.515047 2.236298 -1.752002
## beta_date[51] beta_date[52] beta_date[53] beta_date[54] beta_date[55]
## -0.526641 -20.849932 0.717295 1.643243 -5.295595
## beta_date[56] beta_date[57] beta_date[58] beta_date[59] beta_date[60]
## 4.062297 -6.711596 6.048649 -7.083123 5.491588
## beta_date[61] beta_date[62] beta_date[63] h[64] h[65]
## -14.323486 -7.095878 0.979930 -0.555846 0.430803
## h[66] h[67] h[68] h[69] h[70]
## -0.055280 -0.738454 0.613307 -0.756644 0.172275
## h[71] h[72] h[73] h[74] mf_dry_fit[1]
## -0.204978 0.272638 -0.343717 NaN -0.440623
## mf_dry_fit[2] mf_dry_fit[3] mf_dry_fit[4] mf_dry_fit[5] mf_dry_fit[6]
## 0.263039 -1.106354 1.998523 0.775413 -1.093132
## mf_dry_fit[7] mf_dry_fit[8] mf_dry_fit[9] mf_dry_fit[10] mf_dry_fit[11]
## -0.307279 0.467322 0.036460 1.533074 -0.551737
## mf_dry_fit[12] mf_dry_fit[13] mf_dry_fit[14] mf_dry_fit[15] mf_dry_fit[16]
## -0.176634 -1.441030 -1.144953 -0.008069 0.511269
## mf_dry_fit[17] mf_dry_fit[18] mf_dry_fit[19] mf_dry_fit[20] mf_dry_fit[21]
## 1.204811 0.076522 0.286230 -2.382012 -0.238553
## mf_dry_fit[22] mf_dry_fit[23] mf_dry_fit[24] mf_dry_fit[25] mf_dry_fit[26]
## -0.493573 1.553093 0.397984 -0.083341 0.107929
##
##
## [[2]]
##
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5
##
## alpha beta_date[2] beta_date[3] beta_date[4] beta_date[5]
## -0.95709 1.04328 0.96737 -1.10546 -1.16558
## beta_date[6] beta_date[7] beta_date[8] beta_date[9] beta_date[10]
## -0.14319 -2.56538 -0.77801 -1.64584 -1.50482
## beta_date[11] beta_date[12] beta_date[13] beta_date[14] beta_date[15]
## 0.57578 -0.62893 0.36000 0.28266 -1.99767
## beta_date[16] beta_date[17] beta_date[18] beta_date[19] beta_date[20]
## -0.27262 1.82941 -0.22753 -0.14895 1.46035
## beta_date[21] beta_date[22] beta_date[23] beta_date[24] beta_date[25]
## 2.54816 0.20595 0.28710 0.06958 -0.19985
## beta_date[26] beta_date[27] beta_date[28] beta_date[29] beta_date[30]
## 1.81657 0.30120 1.18317 -7.91374 0.45091
## beta_date[31] beta_date[32] beta_date[33] beta_date[34] beta_date[35]
## 3.53159 -2.28784 10.86865 1.82641 12.61772
## beta_date[36] beta_date[37] beta_date[38] beta_date[39] beta_date[40]
## -0.17237 -1.58323 -10.33315 0.22985 3.49113
## beta_date[41] beta_date[42] beta_date[43] beta_date[44] beta_date[45]
## -7.23616 -0.40148 2.87105 6.07667 -4.89801
## beta_date[46] beta_date[47] beta_date[48] beta_date[49] beta_date[50]
## -13.72143 -5.85147 -7.07283 -2.07676 -0.30025
## beta_date[51] beta_date[52] beta_date[53] beta_date[54] beta_date[55]
## 0.83505 5.61170 -0.13402 -11.83544 -1.78355
## beta_date[56] beta_date[57] beta_date[58] beta_date[59] beta_date[60]
## 7.48855 4.66103 6.28791 1.79025 -4.04766
## beta_date[61] beta_date[62] beta_date[63] h[64] h[65]
## -8.84464 -11.40829 -3.04260 -0.08750 0.18378
## h[66] h[67] h[68] h[69] h[70]
## -1.61666 -0.07449 1.01967 -0.48977 -0.62089
## h[71] h[72] h[73] h[74] mf_dry_fit[1]
## -0.17065 0.72195 -0.37794 NaN -1.20271
## mf_dry_fit[2] mf_dry_fit[3] mf_dry_fit[4] mf_dry_fit[5] mf_dry_fit[6]
## -0.21124 -1.14957 -0.28674 -1.53528 -0.14007
## mf_dry_fit[7] mf_dry_fit[8] mf_dry_fit[9] mf_dry_fit[10] mf_dry_fit[11]
## -0.87801 0.64351 -1.67410 -0.91101 0.19235
## mf_dry_fit[12] mf_dry_fit[13] mf_dry_fit[14] mf_dry_fit[15] mf_dry_fit[16]
## 1.20617 -0.84323 0.66160 -2.79829 -0.16462
## mf_dry_fit[17] mf_dry_fit[18] mf_dry_fit[19] mf_dry_fit[20] mf_dry_fit[21]
## -1.93559 -0.80735 1.86436 1.23058 -1.31262
## mf_dry_fit[22] mf_dry_fit[23] mf_dry_fit[24] mf_dry_fit[25] mf_dry_fit[26]
## -2.52667 -0.32557 -0.13842 2.59120 0.81179
gelman.diag(out[,c(paste0('mf_pred[', 8:9, ']'), 'beta_date[9]', 'mu_beta_whp', 'mu_beta_date', 'mu_Intercept', 'total_power')])[[1]] %>% as.data.frame() %>% round(2)
| Point est. | Upper C.I. | |
|---|---|---|
| mf_pred[8] | 1 | 1.02 |
| mf_pred[9] | 1 | 1.00 |
| beta_date[9] | 1 | 1.00 |
| mu_beta_whp | 1 | 1.00 |
| mu_beta_date | 1 | 1.01 |
| mu_Intercept | 1 | 1.00 |
| total_power | 1 | 1.01 |
raftery.diag(out[,c(paste0('mf_pred[', 8:9, ']'), 'beta_date[9]', 'mu_beta_whp', 'mu_beta_date', 'mu_Intercept', 'total_power')])
## [[1]]
##
## Quantile (q) = 0.025
## Accuracy (r) = +/- 0.005
## Probability (s) = 0.95
##
## You need a sample size of at least 3746 with these values of q, r and s
##
## [[2]]
##
## Quantile (q) = 0.025
## Accuracy (r) = +/- 0.005
## Probability (s) = 0.95
##
## You need a sample size of at least 3746 with these values of q, r and s
testx = seq(0, 10, 1)
testy = 2*testx
N = length(testx)
testdata = list(x=testx, y=testy, N=N)
testcode = "
model {
for (i in 1:N) {
mu[i] <- alpha + beta * x[i]
y[i] ~ dnorm(mu[i], tau)
}
alpha ~ dnorm(0, 1e-8)
beta ~ dnorm(0, 1e-8)
tau ~ dgamma(1e-8, 1e-8)
z ~ dnorm(0, 1)
trunc <- max(min(z, 1), -1)
}
"
testmodel = jags.model(textConnection(testcode), testdata, n.chains=1)
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 11
## Unobserved stochastic nodes: 4
## Total graph size: 55
##
## Initializing model
update(testmodel, burn_in)
testout = coda.samples(testmodel, n.iter=10000, variable.names=c("beta", "alpha", "z", "trunc")) %>%
as.matrix() %>%
as.data.frame() %>%
gather()
ggplot(testout, aes(x=value, fill=key)) +
geom_histogram(color=NA, alpha=0.5) +
facet_grid(key~., scales="free")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.